Clean up environment and load motor vehicle thefts data into mvt
rm(list=ls())
setwd('~/EdX/AE')
mvt = read.csv("./Data/mvt.csv", stringsAsFactors=FALSE) #mvt = MotorVehicleThefts
Manipulate “12/31/12 22:00” format date
mvt$Date = strptime(mvt$Date, format="%m/%d/%y %H:%M")
mvt$Weekday = weekdays(mvt$Date)
mvt$Hour = mvt$Date$hour
Save date data as data frame and ggplot it
WeekdayCounts = as.data.frame(table(mvt$Weekday))
library('ggplot2')
ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))
Convert Var1 to orderedFactor and replot
WeekdayCounts$Var1 = factor(WeekdayCounts$Var1, ordered=TRUE, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday","Saturday"))
ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))
Plot Line Styles
ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1)) + xlab("Day of the Week") + ylab("Total Motor Vehicle Thefts")
ggplot(data = WeekdayCounts, aes(x = Var1, y = Freq)) +
geom_line(aes(group=1), linetype=2) +
ggtitle("linetype=2") +
xlab("Day of Weeek") +
ylab("Total Motor Vehicle Thefts")
#ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1), linetype=2) + xlab("Day of the Week") + ylab("Total Motor Vehicle Thefts" + ggtitle("linetype=2"))
ggplot(data = WeekdayCounts, aes(x = Var1, y = Freq)) +
geom_line(aes(group=1), alpha = 0.3) +
ggtitle("alpha=0.3") +
xlab("Day of Weeek") +
ylab("Total Motor Vehicle Thefts")
#ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1), alpha=0.3) + labs(list(title = "alpha=0.3", x = "Day of Weeek", y = "Total Motor Vehicle Thefts")))
#p + labs(list(title = "Title", x = "X", y = "Y"))
Determine number of thefts per hour by making table of Weekday v Hour, convert to numeric dataFrame, and plot
table(mvt$Weekday, mvt$Hour)
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## Friday 1873 932 743 560 473 602 839 1203 1268 1286 938 822
## Monday 1900 825 712 527 415 542 772 1123 1323 1235 971 737
## Saturday 2050 1267 985 836 652 508 541 650 858 1039 946 789
## Sunday 2028 1236 1019 838 607 461 478 483 615 864 884 787
## Thursday 1856 816 696 508 400 534 799 1135 1298 1301 932 731
## Tuesday 1691 777 603 464 414 520 845 1118 1175 1174 948 786
## Wednesday 1814 790 619 469 396 561 862 1140 1329 1237 947 763
##
## 12 13 14 15 16 17 18 19 20 21 22 23
## Friday 1207 857 937 1140 1165 1318 1623 1652 1736 1881 2308 1921
## Monday 1129 824 958 1059 1136 1252 1518 1503 1622 1815 2009 1490
## Saturday 1204 767 963 1086 1055 1084 1348 1390 1570 1702 2078 1750
## Sunday 1192 789 959 1037 1083 1160 1389 1342 1706 1696 2079 1584
## Thursday 1093 752 831 1044 1131 1258 1510 1537 1668 1776 2134 1579
## Tuesday 1108 762 908 1071 1090 1274 1553 1496 1696 1816 2044 1458
## Wednesday 1225 804 863 1075 1076 1289 1580 1507 1718 1748 2093 1511
DayHourCounts = as.data.frame(table(mvt$Weekday, mvt$Hour))
DayHourCounts$Hour = as.numeric(as.character(DayHourCounts$Var2))
ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1))
Plot options
ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Var1), size=2)
# Separate the weekends from the weekdays:
DayHourCounts$Type = ifelse((DayHourCounts$Var1 == "Sunday") | (DayHourCounts$Var1 == "Saturday"), "Weekend", "Weekday")
ggplot(data = DayHourCounts, aes(x = Hour, y = Freq)) +
geom_line(aes(group=Var1, color=Type), size=2) +
ggtitle("Segregate Weekends Colored by Type") +
xlab("Day of Weeek") +
ylab("Total Motor Vehicle Thefts")
#ggplot(data = DayHourCounts, aes(x = Hour, y = Freq)) +
#geom_line(aes(group=Var1), color=Type, size=2) +
#gtitle("Segregate Weekends Colored by Type"))
#ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Type) + ggtitle("Segregate Weekends\Colored by Type"), size=2)
ggplot(data = DayHourCounts, aes(x = Hour, y = Freq)) +
geom_line(aes(group=Var1, color=Type), size=2, alpha=0.2) +
ggtitle("Segregate Weekends Colored by Type") +
xlab("Day of Weeek") +
ylab("Total Motor Vehicle Thefts")
#ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Type) + ggtitle("alpha=.2 transparency"), size=2, alpha=0.2)
Define Mon-Sun order for days, and create heatmaps
#chronoOrder days
DayHourCounts$Var1 = factor(DayHourCounts$Var1, ordered=TRUE, levels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
ggplot(data = DayHourCounts, aes(x = Hour, y = Var1)) +
geom_tile(aes(fill=Freq)) +
ggtitle("frequency displayed as fill intensity")
#ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq) + ggtitle("frequency displayed as fill intensity"))
ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq)) + scale_fill_gradient(name="Total MV Thefts") + ggtitle("with scale_fill_gradient") + theme(axis.title.y = element_blank())
# Change the color scheme - define what is dark/light
ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq)) + scale_fill_gradient(name="Total MV Thefts", low="white", high="red") + ggtitle("define low crime = white") + theme(axis.title.y = element_blank())
Load and display Chicago map
library('maps')
library('ggmap')
chicago = get_map(location = "chicago", zoom = 11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=chicago&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=chicago&sensor=false
ggmap(chicago)
Plot first 100 motor vehicle thefts on map
ggmap(chicago) +
geom_point(data = mvt[1:100,],
aes(x = Longitude, y = Latitude) ) +
ggtitle("Motor Vehicle Thefts In Chicago")
## Warning in loop_apply(n, do.ply): Removed 7 rows containing missing values
## (geom_point).
#ggmap(chicago) + geom_point(data = mvt[1:100,], aes(x = Longitude, y = Latitude) )#+ ggtitle("Motor Vehicle Thefts In Chicago"))
#labs(x = 'Longitude', y = 'Latitude') + ggtitle('Baylor University')
By area, create a crimeCounts dataFrame with numeric long/lat data, and plot on map
LatLonCounts = as.data.frame(table(round(mvt$Longitude,2), round(mvt$Latitude,2)))
LatLonCounts$Long = as.numeric(as.character(LatLonCounts$Var1))
LatLonCounts$Lat = as.numeric(as.character(LatLonCounts$Var2))
ggmap(chicago) +
geom_point(data = LatLonCounts,
aes(x = Long, y = Lat, color=Freq, size=Freq )) +
ggtitle("Motor Vehicle Thefts In Chicago")
## Warning in loop_apply(n, do.ply): Removed 615 rows containing missing
## values (geom_point).
#ggmap(chicago) + geom_point(data = LatLonCounts, aes(x = Long, y = Lat, color = Freq, size=Freq) +ggtitle("ChicagoCrime by Area"))
ggmap(chicago) + geom_point(data = LatLonCounts, aes(x = Long, y = Lat, color = Freq, size=Freq)) + ggtitle("scale_color_gradient \ yellow=low; red=high") + scale_colour_gradient(low="yellow", high="red")
## Warning in loop_apply(n, do.ply): Removed 615 rows containing missing
## values (geom_point).
ggmap(chicago) + geom_tile(data = LatLonCounts, aes(x = Long, y = Lat, alpha = Freq), fill="red")
Delete aquatic data
LatLonCounts2 = LatLonCounts[LatLonCounts$Freq >0,]
LatLonCounts2$Long = as.numeric(as.character(LatLonCounts2$Var1))
LatLonCounts2$Lat = as.numeric(as.character(LatLonCounts2$Var2))
ggmap(chicago) + ggtitle("Chicago has no water crime") + geom_tile(data=LatLonCounts2, aes(x = Long, y = Lat, alpha=Freq), fill="red")
Clean up environment and load murder data and US map
rm(list=ls())
murders = read.csv("./Data/murders.csv")
statesMap = map_data("state")
ggplot(statesMap, aes(x = long, y = lat, group = group)) + geom_polygon(fill = "white", color = "black")
Merge statesMap data and murder data, and plot
# Create a new variable called region with the lowercase names to match the statesMap:
murders$region = tolower(murders$State)
murderMap = merge(statesMap, murders, by="region")
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Murders)) + ggtitle("Murders in the US by State") + geom_polygon(color = "black") + scale_fill_gradient(low = "black", high = "red", guide = "legend")
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Population)) + ggtitle("Population in US") + geom_polygon(color = "black") + scale_fill_gradient(low = "black", high = "red", guide = "legend")
murderMap$MurderRate = murderMap$Murders / murderMap$Population * 100000
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) + ggtitle("Murder Rate") + geom_polygon(color = "black") + scale_fill_gradient(low = "white", high = "red", guide = "legend")
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) + ggtitle("Murders in the US sans DC") + geom_polygon(white = "black") + scale_fill_gradient(low = "white", high = "red", guide = "legend", limits = c(0,10))
murderMap$GunRate = murderMap$GunOwnership/ murderMap$Population * 100000
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = GunRate)) + ggtitle("Gun Ownership by State") + geom_polygon(color = "black") + scale_fill_gradient(low = "white", high = "red", guide = "legend")
murderMap$GunRate = log(murderMap$GunOwnership/ murderMap$Population)
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = GunRate)) + ggtitle("Rate of Gun Ownership") +geom_polygon(color = "white") + scale_fill_gradient(low = "black", high = "red", guide = "legend")